See tokenizer on Wiktionary
{
"etymology_templates": [
{
"args": {
"1": "en",
"2": ":af",
"3": "tokenize",
"4": "-er<id:agent noun>",
"text": "+",
"tree": "1"
},
"expansion": "Etymology tree\nEnglish tokenize\nProto-Indo-European *-yósder.\nProto-Italic *-āzijos\nLatin -āriusnom.\nLatin -āriusbor.\nProto-Germanic *-ārijaz\nProto-West Germanic *-ārī\nOld English -ere\nMiddle English -ere\nEnglish -er\nEnglish tokenizer\n[Appendix:Glossary#loanword|Borrowed]] from\", \"terms\" : [ { \"id\" : \"agent noun\", \"children\" : [ { \"keyword_abbrev\" : \"nom.\", \"keyword_label\" : \"Nominalization of\", \"terms\" : [ { \"id\" : \"adjective\", \"children\" : [ { \"terms\" : [ { \"id\" : \"adjective\", \"children\" : [ { \"keyword_abbrev\" : \"der.\", \"keyword_label\" : \"Derived from\", \"terms\" : [ { \"id\" : \"suffix\", \"children\" : [ ], \"status\" : \"ok\", \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-yós\", \"lang\" : \"ine-pro\" } ], \"keyword\" : \"derived\" } ], \"status\" : \"ok\", \"lang_name\" : \"Proto-Italic\", \"term\" : \"*-āzijos\", \"lang\" : \"itc-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Latin\", \"term\" : \"-ārius\", \"lang\" : \"la\" } ], \"keyword\" : \"nominalization\" } ], \"status\" : \"ok\", \"lang_name\" : \"Latin\", \"term\" : \"-ārius\", \"lang\" : \"la\" } ], \"keyword\" : \"bor\" } ], \"status\" : \"ok\", \"lang_name\" : \"Proto-Germanic\", \"term\" : \"*-ārijaz\", \"lang\" : \"gem-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Proto-West Germanic\", \"term\" : \"*-ārī\", \"lang\" : \"gmw-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Old English\", \"term\" : \"-ere\", \"lang\" : \"ang\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Middle English\", \"term\" : \"-ere\", \"lang\" : \"enm\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"English\", \"term\" : \"-er\", \"lang\" : \"en\" } ], \"keyword_label\" : \"From\", \"is_group\" : true, \"keyword\" : \"affix\" } ], \"lang_name\" : \"English\", \"term\" : \"tokenizer\", \"status\" : \"ok\", \"lang\" : \"en\" }\" data-lang=\"en\" data-title=\"tokenizer\">\nFrom tokenize + -er.",
"name": "ety"
}
],
"etymology_text": "Etymology tree\nEnglish tokenize\nProto-Indo-European *-yósder.\nProto-Italic *-āzijos\nLatin -āriusnom.\nLatin -āriusbor.\nProto-Germanic *-ārijaz\nProto-West Germanic *-ārī\nOld English -ere\nMiddle English -ere\nEnglish -er\nEnglish tokenizer\nFrom tokenize + -er.",
"forms": [
{
"form": "tokenizers",
"tags": [
"plural"
]
}
],
"head_templates": [
{
"args": {},
"expansion": "tokenizer (plural tokenizers)",
"name": "en-noun"
}
],
"lang": "English",
"lang_code": "en",
"pos": "noun",
"senses": [
{
"categories": [
{
"kind": "other",
"name": "English entries with etymology texts",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "English entries with etymology trees",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "English entries with incorrect language header",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "English terms suffixed with -er (agent noun)",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "Pages using etymon with no ID",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "Pages with 1 entry",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "Pages with entries",
"parents": [],
"source": "w"
},
{
"kind": "other",
"name": "Pages with etymology trees",
"parents": [],
"source": "w"
},
{
"kind": "other",
"langcode": "en",
"name": "Computing",
"orig": "en:Computing",
"parents": [],
"source": "w"
}
],
"glosses": [
"A system that parses an input stream into its component tokens."
],
"id": "en-tokenizer-en-noun-hbixPV~W",
"links": [
[
"computing",
"computing#Noun"
],
[
"system",
"system"
],
[
"parse",
"parse"
],
[
"input",
"input"
],
[
"stream",
"stream"
],
[
"token",
"token"
]
],
"raw_glosses": [
"(computing) A system that parses an input stream into its component tokens."
],
"topics": [
"computing",
"engineering",
"mathematics",
"natural-sciences",
"physical-sciences",
"sciences"
]
}
],
"word": "tokenizer"
}
{
"etymology_templates": [
{
"args": {
"1": "en",
"2": ":af",
"3": "tokenize",
"4": "-er<id:agent noun>",
"text": "+",
"tree": "1"
},
"expansion": "Etymology tree\nEnglish tokenize\nProto-Indo-European *-yósder.\nProto-Italic *-āzijos\nLatin -āriusnom.\nLatin -āriusbor.\nProto-Germanic *-ārijaz\nProto-West Germanic *-ārī\nOld English -ere\nMiddle English -ere\nEnglish -er\nEnglish tokenizer\n[Appendix:Glossary#loanword|Borrowed]] from\", \"terms\" : [ { \"id\" : \"agent noun\", \"children\" : [ { \"keyword_abbrev\" : \"nom.\", \"keyword_label\" : \"Nominalization of\", \"terms\" : [ { \"id\" : \"adjective\", \"children\" : [ { \"terms\" : [ { \"id\" : \"adjective\", \"children\" : [ { \"keyword_abbrev\" : \"der.\", \"keyword_label\" : \"Derived from\", \"terms\" : [ { \"id\" : \"suffix\", \"children\" : [ ], \"status\" : \"ok\", \"lang_name\" : \"Proto-Indo-European\", \"term\" : \"*-yós\", \"lang\" : \"ine-pro\" } ], \"keyword\" : \"derived\" } ], \"status\" : \"ok\", \"lang_name\" : \"Proto-Italic\", \"term\" : \"*-āzijos\", \"lang\" : \"itc-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Latin\", \"term\" : \"-ārius\", \"lang\" : \"la\" } ], \"keyword\" : \"nominalization\" } ], \"status\" : \"ok\", \"lang_name\" : \"Latin\", \"term\" : \"-ārius\", \"lang\" : \"la\" } ], \"keyword\" : \"bor\" } ], \"status\" : \"ok\", \"lang_name\" : \"Proto-Germanic\", \"term\" : \"*-ārijaz\", \"lang\" : \"gem-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Proto-West Germanic\", \"term\" : \"*-ārī\", \"lang\" : \"gmw-pro\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Old English\", \"term\" : \"-ere\", \"lang\" : \"ang\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"Middle English\", \"term\" : \"-ere\", \"lang\" : \"enm\" } ], \"keyword_label\" : \"Inherited from\", \"keyword\" : \"inherited\" } ], \"status\" : \"ok\", \"lang_name\" : \"English\", \"term\" : \"-er\", \"lang\" : \"en\" } ], \"keyword_label\" : \"From\", \"is_group\" : true, \"keyword\" : \"affix\" } ], \"lang_name\" : \"English\", \"term\" : \"tokenizer\", \"status\" : \"ok\", \"lang\" : \"en\" }\" data-lang=\"en\" data-title=\"tokenizer\">\nFrom tokenize + -er.",
"name": "ety"
}
],
"etymology_text": "Etymology tree\nEnglish tokenize\nProto-Indo-European *-yósder.\nProto-Italic *-āzijos\nLatin -āriusnom.\nLatin -āriusbor.\nProto-Germanic *-ārijaz\nProto-West Germanic *-ārī\nOld English -ere\nMiddle English -ere\nEnglish -er\nEnglish tokenizer\nFrom tokenize + -er.",
"forms": [
{
"form": "tokenizers",
"tags": [
"plural"
]
}
],
"head_templates": [
{
"args": {},
"expansion": "tokenizer (plural tokenizers)",
"name": "en-noun"
}
],
"lang": "English",
"lang_code": "en",
"pos": "noun",
"senses": [
{
"categories": [
"English countable nouns",
"English entries with etymology texts",
"English entries with etymology trees",
"English entries with incorrect language header",
"English lemmas",
"English nouns",
"English terms suffixed with -er (agent noun)",
"Pages using etymon with no ID",
"Pages with 1 entry",
"Pages with entries",
"Pages with etymology trees",
"en:Computing"
],
"glosses": [
"A system that parses an input stream into its component tokens."
],
"links": [
[
"computing",
"computing#Noun"
],
[
"system",
"system"
],
[
"parse",
"parse"
],
[
"input",
"input"
],
[
"stream",
"stream"
],
[
"token",
"token"
]
],
"raw_glosses": [
"(computing) A system that parses an input stream into its component tokens."
],
"topics": [
"computing",
"engineering",
"mathematics",
"natural-sciences",
"physical-sciences",
"sciences"
]
}
],
"word": "tokenizer"
}
Download raw JSONL data for tokenizer meaning in All languages combined (3.7kB)
This page is a part of the kaikki.org machine-readable All languages combined dictionary. This dictionary is based on structured data extracted on 2026-06-07 from the enwiktionary dump dated 2026-06-01 using wiktextract (e79dea5 and 7f4db16). The data shown on this site has been post-processed and various details (e.g., extra categories) removed, some information disambiguated, and additional data merged from other sources. See the raw data download page for the unprocessed wiktextract data.
If you use this data in academic research, please cite Tatu Ylonen: Wiktextract: Wiktionary as Machine-Readable Structured Data, Proceedings of the 13th Conference on Language Resources and Evaluation (LREC), pp. 1317-1325, Marseille, 20-25 June 2022. Linking to the relevant page(s) under https://kaikki.org would also be greatly appreciated.